#!/usr/bin/env python

########################################################
# Part of MILIM NIRDAFOT project.
# Written by H. Magal, 2014
# See details on the project at the site:
#   https://sites.google.com/site/nirdafotdownloads/home
########################################################

# The basis for the list of words-lists is the files PLxxx.csv (after being processed by create_sorted_CSV_file.py)
# This file reads the output.csv file
# this file pickles the DB version + synonyms list into a file synonyms.data

import codecs
import re
import os
import tarfile
import myversion
import pickle

def HebrewDecoder (name):
    fin = codecs.open (filename=name, encoding='utf_16')
    sin = fin.readlines ()
    fin.close ()

    outlist = list ()
    for s in sin:                               # outer loop on lines of input file
        sl = re.split (',PL[0-9]+', s, 1)       # split string at PLxxxxxx.csv pattern
        linelist = sl[0].split (',')            # split into a list of words
        outlist.append (linelist)               # append line to whole list

    ######################################
    # write the final synonyms.data file #
    ######################################
    list_name = 'synonyms.data'
    fout = open (list_name, 'wb')

    pickle.dump (obj=myversion.version_number, file=fout, protocol=pickle.HIGHEST_PROTOCOL)
    pickle.dump (obj=outlist, file=fout, protocol=pickle.HIGHEST_PROTOCOL)
    fout.close ()

    ############################
    # write the final TAR file #
    ############################
    # the name 'nirdafotDB.' is a notation known to the SW upgrade module, so dont change it !
    tar_name = 'nirdafotDB.' + myversion.version_number + '.tar.gz'
    tarout = tarfile.open (name=tar_name, mode='w:gz')
    tarout.add (name=list_name)
    tarout.close ()
    os.remove (list_name)              # remove the NIRDAFORT list after adding it to archive


########## main code ##########
if __name__ == '__main__':
    HebrewDecoder ('output.csv')
    print '  <><><> Don\'t forget to copy the TAR file to the \'Engine\' folder! <><><>'